oil_consumption1 <- read_excel("data/oil-consumption-per-TBD.xlsx")
# change the class of the columns to numeric
vec<- seq(2,57,1)
oil_consumption1[ , vec] <- apply(oil_consumption1[ , vec,drop=F], 2,
function(x) as.numeric(as.character(x)))
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
# Tidy my data
oil_consumption1 <- oil_consumption1 %>%
pivot_longer(-c(country, per_region), names_to = "year", values_to = "oil_comsumption_in_EJ")
# Fill the missing values by back filling method
oil_consumption1 <- oil_consumption1 %>% fill(oil_comsumption_in_EJ, .direction = "up")
sum(is.na(oil_consumption1))
## [1] 0
# head(oil_consumption1)
# Convert the data frame to time series
consumption_ts <- oil_consumption1 %>% group_by(year) %>% summarise(Consumption_in_MBD = sum(oil_comsumption_in_EJ)/1000) %>%
dplyr::select(Consumption_in_MBD) %>%
ts(start = 1965, frequency = 1)
dygraph(consumption_ts,
main = "World Oil Consumption",
ylab = "Consumption in Million Barrels Daily",
xlab = "Years") %>%
dyRangeSelector()
ts_cor(consumption_ts)# Illustrating the lag seasonality
dyy<- diff(consumption_ts) # taking the first difference of the time series to eliminate the trend and make it stationary
autoplot(dyy)
fit<- snaive(dyy) #Residual SD: 1.9801
print(summary(fit))
##
## Forecast method: Seasonal naive method
##
## Model Information:
## Call: snaive(y = dyy)
##
## Residual sd: 1.9801
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.2076996 1.980146 1.219474 -10.20389 144.3203 1 -0.105236
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2021 -9.120961 -11.65862 -6.583302 -13.00197 -5.239947
## 2022 -9.120961 -12.70975 -5.532170 -14.60954 -3.632378
checkresiduals(fit)
##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 6.7596, df = 10, p-value = 0.7479
##
## Model df: 0. Total lags used: 10
fit_ets<- ets(consumption_ts) #Residual SD: 0.0258
print(summary(fit_ets))
## ETS(M,Ad,N)
##
## Call:
## ets(y = consumption_ts)
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.5875
## phi = 0.8619
##
## Initial states:
## l = 33.516
## b = 2.1526
##
## sigma: 0.0257
##
## AIC AICc BIC
## 294.5272 296.2414 306.6793
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.03634579 1.851328 1.10657 0.1518389 1.631993 0.6544345
## ACF1
## Training set 0.05214623
checkresiduals(fit_ets)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,Ad,N)
## Q* = 6.477, df = 5, p-value = 0.2625
##
## Model df: 5. Total lags used: 10
fit_arima<- auto.arima(consumption_ts, d=1) #Residual SD: 1.843909
print(summary(fit_arima))
## Series: consumption_ts
## ARIMA(0,1,1) with drift
##
## Coefficients:
## ma1 drift
## 0.5424 0.8684
## s.e. 0.1897 0.3753
##
## sigma^2 estimated as 3.4: log likelihood=-110.85
## AIC=227.7 AICc=228.17 BIC=233.72
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.006192155 1.793768 1.129508 0.1226427 1.687617 0.6680002
## ACF1
## Training set 0.007343122
checkresiduals(fit_arima)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1) with drift
## Q* = 2.9032, df = 8, p-value = 0.9403
##
## Model df: 2. Total lags used: 10
#AR model
library(vars)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: strucchange
## Loading required package: sandwich
##
## Attaching package: 'strucchange'
## The following object is masked from 'package:stringr':
##
## boundary
## Loading required package: urca
## Loading required package: lmtest
##
## Attaching package: 'vars'
## The following object is masked from 'package:tidyquant':
##
## VAR
library(mFilter)
library(tseries)
library(TSstudio)
library(forecast)
library(tidyverse)
VARselect(consumption_ts,lag.max = 10, type = "const")
## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 2 2 2 2
##
## $criteria
## 1 2 3 4 5 6 7 8
## AIC(n) 1.369862 1.299587 1.335281 1.378151 1.374155 1.406465 1.447702 1.485097
## HQ(n) 1.399645 1.344262 1.394848 1.452610 1.463505 1.510708 1.566836 1.619123
## SC(n) 1.449368 1.418846 1.494293 1.576917 1.612673 1.684737 1.765726 1.842875
## FPE(n) 3.935022 3.668461 3.802737 3.970983 3.957645 4.091239 4.268548 4.438020
## 9 10
## AIC(n) 1.526807 1.567175
## HQ(n) 1.675725 1.730984
## SC(n) 1.924338 2.004459
## FPE(n) 4.636026 4.838562
AR <- ar(diff(consumption_ts), p= 2, type = "const") # Residual SD = 1.916507
print(summary(AR))
## Length Class Mode
## order 1 -none- numeric
## ar 1 -none- numeric
## var.pred 1 -none- numeric
## x.mean 1 -none- numeric
## aic 18 -none- numeric
## n.used 1 -none- numeric
## n.obs 1 -none- numeric
## order.max 1 -none- numeric
## partialacf 17 -none- numeric
## resid 55 ts numeric
## method 1 -none- character
## series 1 -none- character
## frequency 1 -none- numeric
## call 4 -none- call
## asy.var.coef 1 -none- numeric
checkresiduals(AR)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.
I have chosen the ETS since it has the lowest residual
fcs_cons <- forecast(fit_ets, h= 10) # forecasting 10 years
autoplot(fcs_cons)
print(summary(fcs_cons))
##
## Forecast method: ETS(M,Ad,N)
##
## Model Information:
## ETS(M,Ad,N)
##
## Call:
## ets(y = consumption_ts)
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.5875
## phi = 0.8619
##
## Initial states:
## l = 33.516
## b = 2.1526
##
## sigma: 0.0257
##
## AIC AICc BIC
## 294.5272 296.2414 306.6793
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.03634579 1.851328 1.10657 0.1518389 1.631993 0.6544345
## ACF1
## Training set 0.05214623
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2021 84.05387 81.28489 86.82285 79.81908 88.28866
## 2022 80.24052 75.30143 85.17961 72.68684 87.79421
## 2023 76.95388 69.79343 84.11434 66.00291 87.90486
## 2024 74.12120 64.72408 83.51831 59.74955 88.49284
## 2025 71.67976 60.06634 83.29319 53.91856 89.44097
## 2026 69.57555 55.79087 83.36022 48.49371 90.65739
## 2027 67.76196 51.86680 83.65713 43.45240 92.07153
## 2028 66.19888 48.26311 84.13464 38.76849 93.62926
## 2029 64.85169 44.94975 84.75362 34.41431 95.28907
## 2030 63.69057 41.89822 85.48292 30.36205 97.01909
plot_forecast(fcs_cons)
As you can see because of the pandemic the forecasting was miss leaded. Therefore, I tried to approach this problem from different side. I eliminated the 2020 drop (from the pandemic) and the forecast, lets see what happened
oil_consumption2 <- read_excel("data/oil-consumption-per-TBD_2.xlsx")
# change the class of the columns to numeric
vec<- seq(2,56,1)
oil_consumption2[ , vec] <- apply(oil_consumption2[ , vec,drop=F], 2,
function(x) as.numeric(as.character(x)))
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): NAs introduced by coercion
# Tidy my data
oil_consumption2 <- oil_consumption2 %>%
pivot_longer(-c(country, per_region), names_to = "year", values_to = "oil_comsumption_in_MBD")
# therefore we will do a back fill
oil_consumption2 <- oil_consumption2 %>% fill(oil_comsumption_in_MBD, .direction = "up")
sum(is.na(oil_consumption1))
## [1] 0
consumption_ts1 <- oil_consumption2 %>% group_by(year) %>% summarise(Consumption_in_MBD = sum(oil_comsumption_in_MBD)/1000) %>%
dplyr:: select(Consumption_in_MBD ) %>%
ts(, start = c(1965), frequency = 1)
plot(consumption_ts1)
fit1<- snaive(diff(consumption_ts1)) #Residual SD: 1.5193
print(summary(fit1))
##
## Forecast method: Seasonal naive method
##
## Model Information:
## Call: snaive(y = diff(consumption_ts1))
##
## Residual sd: 1.5193
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.03323201 1.519344 1.064096 -12.35221 145.0876 1 -0.2414877
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2020 0.3335225 -1.613595 2.280640 -2.644336 3.311381
## 2021 0.3335225 -2.420117 3.087162 -3.877806 4.544851
checkresiduals(fit1)
##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 17.834, df = 10, p-value = 0.05782
##
## Model df: 0. Total lags used: 10
ETS Model
fit_ets1<- ets(consumption_ts1) #Residual SD: 1.3983
print(summary(fit_ets1))
## ETS(A,Ad,N)
##
## Call:
## ets(y = consumption_ts1)
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.575
## phi = 0.8
##
## Initial states:
## l = 32.91
## b = 4.6211
##
## sigma: 1.3983
##
## AIC AICc BIC
## 264.0384 265.7884 276.0824
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.2675777 1.333213 0.9688798 0.3861036 1.504087 0.6237615
## ACF1
## Training set 0.02412095
checkresiduals(fit_ets1)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,Ad,N)
## Q* = 12.097, df = 5, p-value = 0.03348
##
## Model df: 5. Total lags used: 10
fit_arima1<- auto.arima(consumption_ts1, d=1) #Residual SD: 1.246595
print(summary(fit_arima1))
## Series: consumption_ts1
## ARIMA(1,1,1) with drift
##
## Coefficients:
## ar1 ma1 drift
## -0.3628 0.8637 1.1434
## s.e. 0.1701 0.0954 0.2246
##
## sigma^2 estimated as 1.554: log likelihood=-87.36
## AIC=182.72 AICc=183.53 BIC=190.67
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.003135582 1.200341 0.9507481 0.06764132 1.48257 0.6120884
## ACF1
## Training set 0.05812566
checkresiduals(fit_arima1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1) with drift
## Q* = 3.9284, df = 7, p-value = 0.788
##
## Model df: 3. Total lags used: 10
#AR model
library(vars)
library(mFilter)
library(tseries)
library(TSstudio)
library(forecast)
library(tidyverse)
VARselect(consumption_ts1,lag.max = 10, type = "const")
## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 6 2 2 6
##
## $criteria
## 1 2 3 4 5 6 7
## AIC(n) 0.5199406 0.4421863 0.4624644 0.5065880 0.4359571 0.4114818 0.4554496
## HQ(n) 0.5498741 0.4870867 0.5223315 0.5814220 0.5257579 0.5162494 0.5751840
## SC(n) 0.6002367 0.5626305 0.6230566 0.7073283 0.6768454 0.6925182 0.7766341
## FPE(n) 1.6820262 1.5564139 1.5887298 1.6611487 1.5489146 1.5128999 1.5829150
## 8 9 10
## AIC(n) 0.4994564 0.5312357 0.5755493
## HQ(n) 0.6341576 0.6809036 0.7401840
## SC(n) 0.8607889 0.9327163 1.0171779
## FPE(n) 1.6568555 1.7139084 1.7961610
AR1 <- ar(diff(consumption_ts1), p= 2, type = "const") # Residual SD = 1.2907
AR1 <- ar(diff(consumption_ts1), p= 6, type = "const") # Residual SD = 1.2907
print(summary(AR))
## Length Class Mode
## order 1 -none- numeric
## ar 1 -none- numeric
## var.pred 1 -none- numeric
## x.mean 1 -none- numeric
## aic 18 -none- numeric
## n.used 1 -none- numeric
## n.obs 1 -none- numeric
## order.max 1 -none- numeric
## partialacf 17 -none- numeric
## resid 55 ts numeric
## method 1 -none- character
## series 1 -none- character
## frequency 1 -none- numeric
## call 4 -none- call
## asy.var.coef 1 -none- numeric
checkresiduals(AR)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.
# forecast
fcs_cons1 <- forecast(fit_arima1, h= 10)
autoplot(fcs_cons1)
print(summary(fcs_cons1))
##
## Forecast method: ARIMA(1,1,1) with drift
##
## Model Information:
## Series: consumption_ts1
## ARIMA(1,1,1) with drift
##
## Coefficients:
## ar1 ma1 drift
## -0.3628 0.8637 1.1434
## s.e. 0.1701 0.0954 0.2246
##
## sigma^2 estimated as 1.554: log likelihood=-87.36
## AIC=182.72 AICc=183.53 BIC=190.67
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.003135582 1.200341 0.9507481 0.06764132 1.48257 0.6120884
## ACF1
## Training set 0.05812566
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2020 98.51155 96.91406 100.1090 96.06840 100.9547
## 2021 99.73846 96.85728 102.6196 95.33207 104.1449
## 2022 100.85159 97.28191 104.4213 95.39223 106.3109
## 2023 102.00599 97.80612 106.2059 95.58284 108.4292
## 2024 103.14542 98.41597 107.8749 95.91235 110.3785
## 2025 104.29029 99.07906 109.5015 96.32040 112.2602
## 2026 105.43318 99.78303 111.0833 96.79202 114.0743
## 2027 106.57678 100.51879 112.6348 97.31187 115.8417
## 2028 107.72013 101.28028 114.1600 97.87123 117.5690
## 2029 108.86357 102.06320 115.6639 98.46331 119.2638
plot_forecast(fcs_cons1)
forcast_con1 <- fortify(fcs_cons1)
forcast_con1[1:56, 3]
## [1] 35.88447 37.17609 39.49620 41.41403 44.47001 47.36249 50.65372 52.38458
## [9] 56.36573 59.71718 56.20743 57.55541 60.84182 61.98083 65.66493 64.90642
## [17] 61.27084 60.41140 58.11362 59.36699 60.10207 60.47672 62.95522 63.45150
## [25] 66.43129 66.41210 67.65394 66.97134 69.04226 67.50697 71.25237 70.56000
## [33] 73.77546 74.79043 75.12704 77.31022 77.08545 78.84964 78.95014 81.68998
## [41] 84.17259 84.48699 86.01142 86.90470 84.95955 84.55628 88.81948 87.62523
## [49] 90.62271 90.49744 92.14051 94.13754 95.57062 97.49088 98.20498 NA
consumption_df1 <- data.frame(date = forcast_con1$Index, consumption = c(forcast_con1 [ 1:56, 3], forcast_con1 [ 57:nrow(forcast_con1), 4]))
model <- consumption_df1
Now lets add the drop in 2020